In [ ]:
# Basic imports
from Allohubpy import SAtraj
from Allohubpy import Overlap
from Allohubpy import SANetwork
from Allohubpy import TrajProcessor
from Allohubpy.plotter import Allohub_plots
import numpy as np
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests

Comparing advatatges of randomized vs non randomized blocks¶

In this example the trajectories will be loaded first and encoded into two different alphabets (M32K25 and 3DI). To ensure faster encoding times removing the water molecules as well as removing unnecessary atoms will speed up the calculations.

The Structural Alphabet handled is initialized and the data is loaded. The package come by default with the M32K25 alphabet as well as the 3DI ones but other alphabets can be provided as a list of possible tokens.

We will use skip 100 to skip the frist 100 frames of each trajectorie (corresponding to 1ns) as equilibration. The argument stride will extract every 10th frame producing an spacing of 1ns between frames.

Trajectory encoding¶

The first step is to convert the MD trajectories to the structural alphabet of choice

In [3]:
# Other alphabets can be easily added to the analysis using the AbsEncoder class. 
# Absencoder takes

# Encoder for 3DI
enc_3di = TrajProcessor.Encoder3DI("encoded_NtrC_repl1_3di.sa")

# Encoder for M32K25
enc_mk = TrajProcessor.SAEncoder("encoded_NtrC_repl1_mk.sa")

# Trajectory is saved every 10 ps. with stride = 10 only the 1-th frames will be processed producing an spacing of 100 ps between frames
# We skip 100 frames -> 1ns as extra equilibration

## Load repl1 of condition 1
enc_3di.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl1.xtc", skip=100, stride=10)
enc_3di.encode()

enc_mk.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl1.xtc", skip=100, stride=10)
enc_mk.encode()

print("Encoded NtrC inactive repl1")

## Load repl2 of condition 1
enc_3di.set_output_file("encoded_NtrC_repl2_3di.sa")
enc_mk.set_output_file("encoded_NtrC_repl2_mk.sa")

enc_3di.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl2.xtc", skip=100, stride=10)
enc_3di.encode()

enc_mk.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl2.xtc", skip=100, stride=10)
enc_mk.encode()

print("Encoded NtrC inactive repl2")

## Load repl3 of condition 1
enc_3di.set_output_file("encoded_NtrC_repl3_3di.sa")
enc_mk.set_output_file("encoded_NtrC_repl3_mk.sa")

enc_3di.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl3.xtc", skip=100, stride=10)
enc_3di.encode()

enc_mk.load_trajectory(topology="data_NtrC/NtrC_inactive_nowat.pdb", mdtraj="data_NtrC/NtrC_inactive_repl3.xtc", skip=100, stride=10)
enc_mk.encode()

print("Encoded NtrC inactive repl3")

## Load repl1 of condition 2
enc_3di.set_output_file("encoded_NtrC_active_repl1_3di.sa")
enc_mk.set_output_file("encoded_NtrC_active_repl1_mk.sa")

enc_3di.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl1.xtc", skip=100, stride=10)
enc_3di.encode()

enc_mk.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl1.xtc", skip=100, stride=10)
enc_mk.encode()

print("Encoded NtrC active repl1")

## Load repl2 of condition 2
enc_3di.set_output_file("encoded_NtrC_active_repl2_3di.sa")
enc_mk.set_output_file("encoded_NtrC_active_repl2_mk.sa")

enc_3di.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl2.xtc", skip=100, stride=10)
enc_3di.encode()

enc_mk.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl2.xtc", skip=100, stride=10)
enc_mk.encode()

print("Encoded NtrC active repl2")

## Load repl3 of condition 3
enc_3di.set_output_file("encoded_NtrC_active_repl3_3di.sa")
enc_mk.set_output_file("encoded_NtrC_active_repl3_mk.sa")

enc_3di.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl3.xtc", skip=100, stride=10)
enc_3di.encode()

enc_mk.load_trajectory(topology="data_NtrC/NtrC_active_nowat.pdb", mdtraj="data_NtrC/NtrC_active_repl3.xtc", skip=100, stride=10)
enc_mk.encode()

print("Encoded NtrC active repl3")
Encoded NtrC inactive repl1
Encoded NtrC inactive repl2
Encoded NtrC inactive repl3
Encoded NtrC active repl1
Encoded NtrC active repl2
Encoded NtrC active repl3

To illustrate the improvements of the improvements of the methodology. Different block sizes will be used for the encoding of the trajectories.

In [4]:
# Initialize Structural Alphabet trajectory handler
print("Initialize Structural Alphabet trajectory handler")

# Set seeds for reproducibility
seed = 42  # Replace with any integer
import random
np.random.seed(seed)
random.seed(seed)

# With randomization
sa_ntrc_active_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_active_10_1 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_2 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_3 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_10_1 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_2 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_3 = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_active_50_1 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_2 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_3 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_50_1 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_2 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_3 = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])

# Without randomization
sa_ntrc_active_100_1_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_2_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_3_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_100_1_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_2_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_3_norand = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_active_10_1_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_2_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_10_3_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_10_1_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_2_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_10_3_norand = SAtraj.SAtraj(block_size=10, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_active_50_1_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_2_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_50_3_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_50_1_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_2_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_50_3_norand = SAtraj.SAtraj(block_size=50, alphabet=SAtraj.ALPHABETS["M32K25"])

# Load encoded data into the model
print("Load encoded data into the model")

sa_ntrc_active_100_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_100_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_100_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")

sa_ntrc_inactive_100_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_100_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_100_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")

sa_ntrc_active_10_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_10_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_10_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")

sa_ntrc_inactive_10_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_10_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_10_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")

sa_ntrc_active_50_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_50_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_50_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")

sa_ntrc_inactive_50_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_50_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_50_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")

sa_ntrc_active_100_1_norand.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa", randomize=False)
sa_ntrc_active_100_2_norand.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa", randomize=False)
sa_ntrc_active_100_3_norand.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa", randomize=False)

sa_ntrc_inactive_100_1_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa", randomize=False)
sa_ntrc_inactive_100_2_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa", randomize=False)
sa_ntrc_inactive_100_3_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa", randomize=False)

sa_ntrc_active_10_1_norand.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa", randomize=False)
sa_ntrc_active_10_2_norand.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa", randomize=False)
sa_ntrc_active_10_3_norand.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa", randomize=False)

sa_ntrc_inactive_10_1_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa", randomize=False)
sa_ntrc_inactive_10_2_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa", randomize=False)
sa_ntrc_inactive_10_3_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa", randomize=False)

sa_ntrc_active_50_1_norand.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa", randomize=False)
sa_ntrc_active_50_2_norand.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa", randomize=False)
sa_ntrc_active_50_3_norand.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa", randomize=False)

sa_ntrc_inactive_50_1_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa", randomize=False)
sa_ntrc_inactive_50_2_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa", randomize=False)
sa_ntrc_inactive_50_3_norand.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa", randomize=False)
Initialize Structural Alphabet trajectory handler
Load encoded data into the model

Trajectory examination¶

One can examine the encoded structure string as well as all other analysis using the provided plotting functions.

Alternatively, one can addapt the provided plotting functions for other applications. They are all locate din the file Allohub_plots.py

To display the plots the argument action = "show" should be used while for saving to a file it should be action="save" If the save option is provided the name of the file can be specified with name="my_name.png". The format of the image will depend on the format specified in the file name.

In [5]:
# Plot the randomized trajectory of the Structural Alphabet trajectory of NtrC active repl1
Allohub_plots.plot_SA_traj(sa_ntrc_active_100_1_norand.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
In [6]:
# Plot the randomized trajectory of the Structural Alphabet  trajectory of NtrC inactive repl1
Allohub_plots.plot_SA_traj(sa_ntrc_inactive_100_1_norand.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")

Mutual information analysis¶

Now we will proceed to compute the mutual information for each block. Trajectories with block 100 contain 9 blocks of 100 ns each. Trajectories with block size 50 contain 19 blocks of 50 ns each. Trajectories with block size 10 contains 99 blocks of 10 ns each.

In [7]:
print("Calculate the MI information")
# One can specify the number of workers to parallelize the process. max_workers=None would use all available resources.

mi_ntrc_inactive_100_1_norand = sa_ntrc_inactive_100_1_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 100, repl 1 finished")
mi_ntrc_inactive_100_2_norand = sa_ntrc_inactive_100_2_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 100, repl 2 finished")
mi_ntrc_inactive_100_3_norand = sa_ntrc_inactive_100_3_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 100, repl 3 finished")

mi_ntrc_inactive_50_1_norand = sa_ntrc_inactive_50_1_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 50, repl 1 finished")
mi_ntrc_inactive_50_2_norand = sa_ntrc_inactive_50_2_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 50, repl 2 finished")
mi_ntrc_inactive_50_3_norand = sa_ntrc_inactive_50_3_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 50, repl 3 finished")

mi_ntrc_inactive_10_1_norand = sa_ntrc_inactive_10_1_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 10, repl 1 finished")
mi_ntrc_inactive_10_2_norand = sa_ntrc_inactive_10_2_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 10, repl 2 finished")
mi_ntrc_inactive_10_3_norand = sa_ntrc_inactive_10_3_norand.compute_mis(max_workers=7)
print("NtrC inactive, no randomization, block size 10, repl 3 finished")

mi_ntrc_active_100_1_norand = sa_ntrc_active_100_1_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 100, repl 1 finished")
mi_ntrc_active_100_2_norand = sa_ntrc_active_100_2_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 100, repl 2 finished")
mi_ntrc_active_100_3_norand = sa_ntrc_active_100_3_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 100, repl 3 finished")

mi_ntrc_active_50_1_norand = sa_ntrc_active_50_1_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 50, repl 1 finished")
mi_ntrc_active_50_2_norand = sa_ntrc_active_50_2_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 50, repl 2 finished")
mi_ntrc_active_50_3_norand = sa_ntrc_active_50_3_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 50, repl 3 finished")

mi_ntrc_active_10_1_norand = sa_ntrc_active_10_1_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 10, repl 1 finished")
mi_ntrc_active_10_2_norand = sa_ntrc_active_10_2_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 10, repl 2 finished")
mi_ntrc_active_10_3_norand = sa_ntrc_active_10_3_norand.compute_mis(max_workers=7)
print("NtrC active, no randomization, block size 10, repl 3 finished")

mi_ntrc_inactive_100_1 = sa_ntrc_inactive_100_1.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 100, repl 1 finished")
mi_ntrc_inactive_100_2 = sa_ntrc_inactive_100_2.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 100, repl 2 finished")
mi_ntrc_inactive_100_3 = sa_ntrc_inactive_100_3.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 100, repl 3 finished")

mi_ntrc_inactive_50_1 = sa_ntrc_inactive_50_1.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 50, repl 1 finished")
mi_ntrc_inactive_50_2 = sa_ntrc_inactive_50_2.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 50, repl 2 finished")
mi_ntrc_inactive_50_3 = sa_ntrc_inactive_50_3.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 50, repl 3 finished")

mi_ntrc_inactive_10_1 = sa_ntrc_inactive_10_1.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 10, repl 1 finished")
mi_ntrc_inactive_10_2 = sa_ntrc_inactive_10_2.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 10, repl 2 finished")
mi_ntrc_inactive_10_3 = sa_ntrc_inactive_10_3.compute_mis(max_workers=7)
print("NtrC inactive, randomization, block size 10, repl 3 finished")

mi_ntrc_active_100_1 = sa_ntrc_active_100_1.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 100, repl 1 finished")
mi_ntrc_active_100_2 = sa_ntrc_active_100_2.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 100, repl 2 finished")
mi_ntrc_active_100_3 = sa_ntrc_active_100_3.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 100, repl 3 finished")

mi_ntrc_active_50_1 = sa_ntrc_active_50_1.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 50, repl 1 finished")
mi_ntrc_active_50_2 = sa_ntrc_active_50_2.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 50, repl 2 finished")
mi_ntrc_active_50_3 = sa_ntrc_active_50_3.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 50, repl 3 finished")

mi_ntrc_active_10_1 = sa_ntrc_active_10_1.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 10, repl 1 finished")
mi_ntrc_active_10_2 = sa_ntrc_active_10_2.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 10, repl 2 finished")
mi_ntrc_active_10_3 = sa_ntrc_active_10_3.compute_mis(max_workers=7)
print("NtrC active, randomization, block size 10, repl 3 finished")
Calculate the MI information
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:32<00:00,  3.62s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 100, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00,  3.44s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 100, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.69s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 100, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00,  2.26s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 50, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:39<00:00,  2.08s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 50, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00,  2.17s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 50, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:50<00:00,  1.72s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 10, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:40<00:00,  1.62s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 10, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:49<00:00,  1.71s/block]
TRAJECTORY COMPLETED
NtrC inactive, no randomization, block size 10, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.67s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 100, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00,  3.55s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 100, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:32<00:00,  3.58s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 100, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00,  2.20s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 50, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00,  2.17s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 50, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00,  2.16s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 50, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:39<00:00,  1.61s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 10, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:46<00:00,  1.68s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 10, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:41<00:00,  1.63s/block]
TRAJECTORY COMPLETED
NtrC active, no randomization, block size 10, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:29<00:00,  3.26s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 100, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.69s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 100, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00,  3.52s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 100, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00,  2.22s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 50, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00,  2.22s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 50, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00,  2.21s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 50, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:41<00:00,  1.63s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 10, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:42<00:00,  1.64s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 10, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:46<00:00,  1.68s/block]
TRAJECTORY COMPLETED
NtrC inactive, randomization, block size 10, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.75s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 100, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:28<00:00,  3.16s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 100, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00,  3.49s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 100, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00,  2.20s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 50, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:42<00:00,  2.22s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 50, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 19/19 [00:41<00:00,  2.20s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 50, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:45<00:00,  1.67s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 10, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:51<00:00,  1.73s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 10, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 99/99 [02:54<00:00,  1.76s/block]
TRAJECTORY COMPLETED
NtrC active, randomization, block size 10, repl 3 finished

Eigenvector decomposition¶

The eigenvector decomposition of the obtained MI matrices can be used to asses convergence. The main motions of well converged simulations should have relatively high eigenvector overlap (>0.3).

Overlap between replicates can be used as a way to asses relaiability of the results. (Higher convergences = Higher confidences)

In [8]:
# Do an eigenvector decomposition of the matrices
from tqdm import tqdm

print("Do an eigenvector decomposition of the matrices")

for mi_tr in tqdm(mi_ntrc_inactive_100_1_norand, desc="Eigenvector decomposition for Ntrc 100 norand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_2_norand, desc="Eigenvector decomposition for Ntrc 100 norand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_3_norand, desc="Eigenvector decomposition for Ntrc 100 norand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_inactive_50_1_norand, desc="Eigenvector decomposition for Ntrc 50 norand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_2_norand, desc="Eigenvector decomposition for Ntrc 50 norand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_3_norand, desc="Eigenvector decomposition for Ntrc 50 norand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_inactive_10_1_norand, desc="Eigenvector decomposition for Ntrc 10 norand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_2_norand, desc="Eigenvector decomposition for Ntrc 10 norand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_3_norand, desc="Eigenvector decomposition for Ntrc 10 norand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()
    
for mi_tr in tqdm(mi_ntrc_inactive_100_1, desc="Eigenvector decomposition for Ntrc 100 rand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_2, desc="Eigenvector decomposition for Ntrc 100 rand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_3, desc="Eigenvector decomposition for Ntrc 100 rand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_inactive_50_1, desc="Eigenvector decomposition for Ntrc 50 rand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_2, desc="Eigenvector decomposition for Ntrc 50 rand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_50_3, desc="Eigenvector decomposition for Ntrc 50 rand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_inactive_10_1, desc="Eigenvector decomposition for Ntrc 10 rand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_2, desc="Eigenvector decomposition for Ntrc 10 rand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_10_3, desc="Eigenvector decomposition for Ntrc 10 rand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_active_100_1_norand, desc="Eigenvector decomposition for Ntrc 100 act norand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_2_norand, desc="Eigenvector decomposition for Ntrc 100 act norand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_3_norand, desc="Eigenvector decomposition for Ntrc 100 act norand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_active_50_1_norand, desc="Eigenvector decomposition for Ntrc 50 act norand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_2_norand, desc="Eigenvector decomposition for Ntrc 50 act norand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_3_norand, desc="Eigenvector decomposition for Ntrc 50 act norand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_active_10_1_norand, desc="Eigenvector decomposition for Ntrc 10 act norand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_2_norand, desc="Eigenvector decomposition for Ntrc 10 act norand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_3_norand, desc="Eigenvector decomposition for Ntrc act 10 norand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()
    
for mi_tr in tqdm(mi_ntrc_active_100_1, desc="Eigenvector decomposition for Ntrc act 100 rand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_2, desc="Eigenvector decomposition for Ntrc act 100 rand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_3, desc="Eigenvector decomposition for Ntrc act 100 rand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_active_50_1, desc="Eigenvector decomposition for Ntrc act 50 rand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_2, desc="Eigenvector decomposition for Ntrc act 50 rand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_50_3, desc="Eigenvector decomposition for Ntrc act 50 rand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_active_10_1, desc="Eigenvector decomposition for Ntrc act 10 rand rep1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_2, desc="Eigenvector decomposition for Ntrc act 10 rand rep2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_10_3, desc="Eigenvector decomposition for Ntrc act 10 rand rep3", unit="matrix"):
    mi_tr.compute_eigensystem()
Do an eigenvector decomposition of the matrices
Eigenvector decomposition for Ntrc 100 norand rep1:   0%|          | 0/9 [00:00<?, ?matrix/s]Eigenvector decomposition for Ntrc 100 norand rep1: 100%|██████████| 9/9 [00:00<00:00, 54.28matrix/s]
Eigenvector decomposition for Ntrc 100 norand rep2: 100%|██████████| 9/9 [00:00<00:00, 105.99matrix/s]
Eigenvector decomposition for Ntrc 100 norand rep3: 100%|██████████| 9/9 [00:00<00:00, 105.23matrix/s]
Eigenvector decomposition for Ntrc 50 norand rep1: 100%|██████████| 19/19 [00:00<00:00, 35.71matrix/s]
Eigenvector decomposition for Ntrc 50 norand rep2: 100%|██████████| 19/19 [00:00<00:00, 196.42matrix/s]
Eigenvector decomposition for Ntrc 50 norand rep3: 100%|██████████| 19/19 [00:00<00:00, 214.76matrix/s]
Eigenvector decomposition for Ntrc 10 norand rep1: 100%|██████████| 99/99 [00:00<00:00, 239.07matrix/s]
Eigenvector decomposition for Ntrc 10 norand rep2: 100%|██████████| 99/99 [00:00<00:00, 364.18matrix/s]
Eigenvector decomposition for Ntrc 10 norand rep3: 100%|██████████| 99/99 [00:00<00:00, 487.15matrix/s]
Eigenvector decomposition for Ntrc 100 rand rep1: 100%|██████████| 9/9 [00:00<00:00, 363.07matrix/s]
Eigenvector decomposition for Ntrc 100 rand rep2: 100%|██████████| 9/9 [00:00<00:00, 324.89matrix/s]
Eigenvector decomposition for Ntrc 100 rand rep3: 100%|██████████| 9/9 [00:00<00:00, 304.61matrix/s]
Eigenvector decomposition for Ntrc 50 rand rep1: 100%|██████████| 19/19 [00:00<00:00, 676.68matrix/s]
Eigenvector decomposition for Ntrc 50 rand rep2: 100%|██████████| 19/19 [00:00<00:00, 627.87matrix/s]
Eigenvector decomposition for Ntrc 50 rand rep3: 100%|██████████| 19/19 [00:00<00:00, 681.67matrix/s]
Eigenvector decomposition for Ntrc 10 rand rep1: 100%|██████████| 99/99 [00:00<00:00, 541.86matrix/s]
Eigenvector decomposition for Ntrc 10 rand rep2: 100%|██████████| 99/99 [00:00<00:00, 679.07matrix/s]
Eigenvector decomposition for Ntrc 10 rand rep3: 100%|██████████| 99/99 [00:00<00:00, 745.75matrix/s]
Eigenvector decomposition for Ntrc 100 act norand rep1: 100%|██████████| 9/9 [00:00<00:00, 623.61matrix/s]
Eigenvector decomposition for Ntrc 100 act norand rep2: 100%|██████████| 9/9 [00:00<00:00, 730.59matrix/s]
Eigenvector decomposition for Ntrc 100 act norand rep3: 100%|██████████| 9/9 [00:00<00:00, 326.91matrix/s]
Eigenvector decomposition for Ntrc 50 act norand rep1: 100%|██████████| 19/19 [00:00<00:00, 384.88matrix/s]
Eigenvector decomposition for Ntrc 50 act norand rep2: 100%|██████████| 19/19 [00:00<00:00, 344.99matrix/s]
Eigenvector decomposition for Ntrc 50 act norand rep3: 100%|██████████| 19/19 [00:00<00:00, 496.98matrix/s]
Eigenvector decomposition for Ntrc 10 act norand rep1: 100%|██████████| 99/99 [00:00<00:00, 886.82matrix/s]
Eigenvector decomposition for Ntrc 10 act norand rep2: 100%|██████████| 99/99 [00:00<00:00, 661.50matrix/s]
Eigenvector decomposition for Ntrc act 10 norand rep3: 100%|██████████| 99/99 [00:00<00:00, 569.02matrix/s]
Eigenvector decomposition for Ntrc act 100 rand rep1: 100%|██████████| 9/9 [00:00<00:00, 462.83matrix/s]
Eigenvector decomposition for Ntrc act 100 rand rep2: 100%|██████████| 9/9 [00:00<00:00, 393.17matrix/s]
Eigenvector decomposition for Ntrc act 100 rand rep3: 100%|██████████| 9/9 [00:00<00:00, 401.86matrix/s]
Eigenvector decomposition for Ntrc act 50 rand rep1: 100%|██████████| 19/19 [00:00<00:00, 314.98matrix/s]
Eigenvector decomposition for Ntrc act 50 rand rep2: 100%|██████████| 19/19 [00:00<00:00, 795.24matrix/s]
Eigenvector decomposition for Ntrc act 50 rand rep3: 100%|██████████| 19/19 [00:00<00:00, 809.10matrix/s]
Eigenvector decomposition for Ntrc act 10 rand rep1: 100%|██████████| 99/99 [00:00<00:00, 632.15matrix/s]
Eigenvector decomposition for Ntrc act 10 rand rep2: 100%|██████████| 99/99 [00:00<00:00, 891.42matrix/s]
Eigenvector decomposition for Ntrc act 10 rand rep3: 100%|██████████| 99/99 [00:00<00:00, 537.66matrix/s]

Convergence analysis¶

Overlap can be now computed using the Overlap object.

In this analysis we are using the top 3 eigenvectors which should explain most of the observed variability.

In [9]:
# Create the overlap handler to compute similarities between the trajectories

# Random 100
overlap_100 = Overlap.Overlap([mi_ntrc_inactive_100_1, mi_ntrc_inactive_100_2, mi_ntrc_inactive_100_3,
                            mi_ntrc_active_100_1,  mi_ntrc_active_100_2, mi_ntrc_active_100_3], ev_list=[0,1,2])
overlap_100.fill_overlap_matrix()

# No Random 100
overlap_100_norand = Overlap.Overlap([mi_ntrc_inactive_100_1_norand, mi_ntrc_inactive_100_2_norand, mi_ntrc_inactive_100_3_norand,
                            mi_ntrc_active_100_1_norand,  mi_ntrc_active_100_2_norand, mi_ntrc_active_100_3_norand], ev_list=[0,1,2])
overlap_100_norand.fill_overlap_matrix()

# Random 50
overlap_50 = Overlap.Overlap([mi_ntrc_inactive_50_1, mi_ntrc_inactive_50_2, mi_ntrc_inactive_50_3,
                            mi_ntrc_active_50_1,  mi_ntrc_active_50_2, mi_ntrc_active_50_3], ev_list=[0,1,2])
overlap_50.fill_overlap_matrix()

# No Random 50
overlap_50_norand = Overlap.Overlap([mi_ntrc_inactive_50_1_norand, mi_ntrc_inactive_50_2_norand, mi_ntrc_inactive_50_3_norand,
                            mi_ntrc_active_50_1_norand,  mi_ntrc_active_50_2_norand, mi_ntrc_active_50_3_norand], ev_list=[0,1,2])
overlap_50_norand.fill_overlap_matrix()

# Random 10
overlap_10 = Overlap.Overlap([mi_ntrc_inactive_10_1, mi_ntrc_inactive_10_2, mi_ntrc_inactive_10_3,
                            mi_ntrc_active_10_1,  mi_ntrc_active_10_2, mi_ntrc_active_10_3], ev_list=[0,1,2])
overlap_10.fill_overlap_matrix()

# No Random 10
overlap_10_norand = Overlap.Overlap([mi_ntrc_inactive_10_1_norand, mi_ntrc_inactive_10_2_norand, mi_ntrc_inactive_10_3_norand,
                            mi_ntrc_active_10_1_norand,  mi_ntrc_active_10_2_norand, mi_ntrc_active_10_3_norand], ev_list=[0,1,2])
overlap_10_norand.fill_overlap_matrix()

The results can now be plotted to examine the convergence

In [10]:
# Overlap for block 100 randomized
Allohub_plots.plot_overlap(overlap_100.get_overlap_matrix(), vmax=0.4, action="show")
In [11]:
# Overlap for block 100 no randomized
Allohub_plots.plot_overlap(overlap_100_norand.get_overlap_matrix(), vmax=0.4, action="show")
In [12]:
# Overlap for block 50 randomized
Allohub_plots.plot_overlap(overlap_50.get_overlap_matrix(), vmax=0.4, action="show")
In [13]:
# Overlap for block 50 no randomized
Allohub_plots.plot_overlap(overlap_50_norand.get_overlap_matrix(), vmax=0.4, action="show")
In [14]:
# Overlap for block 10 randomized
Allohub_plots.plot_overlap(overlap_10.get_overlap_matrix(), vmax=0.4, action="show")
In [15]:
# Overlap for block 10 no randomized
Allohub_plots.plot_overlap(overlap_10_norand.get_overlap_matrix(), vmax=0.4, action="show")
In [16]:
# Compute similarities for the 100 block size encodings
similarity_matrix_100_norand = overlap_100_norand.compute_similarities()
similarity_matrix_100 = overlap_100.compute_similarities()
similarity_matrix_50_norand = overlap_50_norand.compute_similarities()
similarity_matrix_50 = overlap_50.compute_similarities()
similarity_matrix_10_norand = overlap_10_norand.compute_similarities()
similarity_matrix_10 = overlap_10.compute_similarities()
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.1965
	Overlap of trajectory 1 with itself: 0.3076
	Overlap of trajectory 2 with itself: 0.3663
 Similary 0.8596
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.1925
	Overlap of trajectory 1 with itself: 0.3076
	Overlap of trajectory 2 with itself: 0.2977
 Similary 0.8898
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.1557
	Overlap of trajectory 1 with itself: 0.3076
	Overlap of trajectory 2 with itself: 0.3256
 Similary 0.8391
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.1438
	Overlap of trajectory 1 with itself: 0.3076
	Overlap of trajectory 2 with itself: 0.3039
 Similary 0.8381
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.1421
	Overlap of trajectory 1 with itself: 0.3076
	Overlap of trajectory 2 with itself: 0.2909
 Similary 0.8428
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.1934
	Overlap of trajectory 1 with itself: 0.3663
	Overlap of trajectory 2 with itself: 0.2977
 Similary 0.8614
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.1499
	Overlap of trajectory 1 with itself: 0.3663
	Overlap of trajectory 2 with itself: 0.3256
 Similary 0.804
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.1562
	Overlap of trajectory 1 with itself: 0.3663
	Overlap of trajectory 2 with itself: 0.3039
 Similary 0.8212
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.17
	Overlap of trajectory 1 with itself: 0.3663
	Overlap of trajectory 2 with itself: 0.2909
 Similary 0.8414
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.1575
	Overlap of trajectory 1 with itself: 0.2977
	Overlap of trajectory 2 with itself: 0.3256
 Similary 0.8458
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.1559
	Overlap of trajectory 1 with itself: 0.2977
	Overlap of trajectory 2 with itself: 0.3039
 Similary 0.8551
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.1455
	Overlap of trajectory 1 with itself: 0.2977
	Overlap of trajectory 2 with itself: 0.2909
 Similary 0.8512
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.1514
	Overlap of trajectory 1 with itself: 0.3256
	Overlap of trajectory 2 with itself: 0.3039
 Similary 0.8367
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.1412
	Overlap of trajectory 1 with itself: 0.3256
	Overlap of trajectory 2 with itself: 0.2909
 Similary 0.8329
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.1612
	Overlap of trajectory 1 with itself: 0.3039
	Overlap of trajectory 2 with itself: 0.2909
 Similary 0.8638
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.2601
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5491
 Similary 0.7317
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.2776
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.4596
 Similary 0.7939
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.2513
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5566
 Similary 0.7191
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.2028
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.6803
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.2106
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6988
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.2559
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.4596
 Similary 0.7515
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.1944
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.5566
 Similary 0.6416
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.2403
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.6971
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.2074
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6749
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.2186
	Overlap of trajectory 1 with itself: 0.4596
	Overlap of trajectory 2 with itself: 0.5566
 Similary 0.7105
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.199
	Overlap of trajectory 1 with itself: 0.4596
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.7006
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.1805
	Overlap of trajectory 1 with itself: 0.4596
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6928
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.2327
	Overlap of trajectory 1 with itself: 0.5566
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.6858
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.2145
	Overlap of trajectory 1 with itself: 0.5566
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6784
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.2812
	Overlap of trajectory 1 with itself: 0.5372
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.7547
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.1842
	Overlap of trajectory 1 with itself: 0.2578
	Overlap of trajectory 2 with itself: 0.2693
 Similary 0.9206
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.195
	Overlap of trajectory 1 with itself: 0.2578
	Overlap of trajectory 2 with itself: 0.2529
 Similary 0.9396
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.1604
	Overlap of trajectory 1 with itself: 0.2578
	Overlap of trajectory 2 with itself: 0.2626
 Similary 0.9002
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.1498
	Overlap of trajectory 1 with itself: 0.2578
	Overlap of trajectory 2 with itself: 0.2613
 Similary 0.8902
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.1539
	Overlap of trajectory 1 with itself: 0.2578
	Overlap of trajectory 2 with itself: 0.2416
 Similary 0.9042
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.1809
	Overlap of trajectory 1 with itself: 0.2693
	Overlap of trajectory 2 with itself: 0.2529
 Similary 0.9198
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.1513
	Overlap of trajectory 1 with itself: 0.2693
	Overlap of trajectory 2 with itself: 0.2626
 Similary 0.8854
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.1518
	Overlap of trajectory 1 with itself: 0.2693
	Overlap of trajectory 2 with itself: 0.2613
 Similary 0.8864
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.1625
	Overlap of trajectory 1 with itself: 0.2693
	Overlap of trajectory 2 with itself: 0.2416
 Similary 0.9071
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.1596
	Overlap of trajectory 1 with itself: 0.2529
	Overlap of trajectory 2 with itself: 0.2626
 Similary 0.9018
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.156
	Overlap of trajectory 1 with itself: 0.2529
	Overlap of trajectory 2 with itself: 0.2613
 Similary 0.8989
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.1534
	Overlap of trajectory 1 with itself: 0.2529
	Overlap of trajectory 2 with itself: 0.2416
 Similary 0.9061
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.1476
	Overlap of trajectory 1 with itself: 0.2626
	Overlap of trajectory 2 with itself: 0.2613
 Similary 0.8856
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.1423
	Overlap of trajectory 1 with itself: 0.2626
	Overlap of trajectory 2 with itself: 0.2416
 Similary 0.8902
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.1628
	Overlap of trajectory 1 with itself: 0.2613
	Overlap of trajectory 2 with itself: 0.2416
 Similary 0.9114
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.2513
	Overlap of trajectory 1 with itself: 0.4236
	Overlap of trajectory 2 with itself: 0.4495
 Similary 0.8147
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.281
	Overlap of trajectory 1 with itself: 0.4236
	Overlap of trajectory 2 with itself: 0.4026
 Similary 0.8679
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.2443
	Overlap of trajectory 1 with itself: 0.4236
	Overlap of trajectory 2 with itself: 0.4281
 Similary 0.8184
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.2012
	Overlap of trajectory 1 with itself: 0.4236
	Overlap of trajectory 2 with itself: 0.401
 Similary 0.7889
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.222
	Overlap of trajectory 1 with itself: 0.4236
	Overlap of trajectory 2 with itself: 0.4349
 Similary 0.7927
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.2615
	Overlap of trajectory 1 with itself: 0.4495
	Overlap of trajectory 2 with itself: 0.4026
 Similary 0.8355
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.199
	Overlap of trajectory 1 with itself: 0.4495
	Overlap of trajectory 2 with itself: 0.4281
 Similary 0.7602
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.2272
	Overlap of trajectory 1 with itself: 0.4495
	Overlap of trajectory 2 with itself: 0.401
 Similary 0.8019
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.2189
	Overlap of trajectory 1 with itself: 0.4495
	Overlap of trajectory 2 with itself: 0.4349
 Similary 0.7767
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.222
	Overlap of trajectory 1 with itself: 0.4026
	Overlap of trajectory 2 with itself: 0.4281
 Similary 0.8067
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.2038
	Overlap of trajectory 1 with itself: 0.4026
	Overlap of trajectory 2 with itself: 0.401
 Similary 0.802
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.1988
	Overlap of trajectory 1 with itself: 0.4026
	Overlap of trajectory 2 with itself: 0.4349
 Similary 0.7801
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.2175
	Overlap of trajectory 1 with itself: 0.4281
	Overlap of trajectory 2 with itself: 0.401
 Similary 0.8029
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.2072
	Overlap of trajectory 1 with itself: 0.4281
	Overlap of trajectory 2 with itself: 0.4349
 Similary 0.7757
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.2628
	Overlap of trajectory 1 with itself: 0.401
	Overlap of trajectory 2 with itself: 0.4349
 Similary 0.8448
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.2118
	Overlap of trajectory 1 with itself: 0.2575
	Overlap of trajectory 2 with itself: 0.2411
 Similary 0.9625
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.2274
	Overlap of trajectory 1 with itself: 0.2575
	Overlap of trajectory 2 with itself: 0.2545
 Similary 0.9713
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.2015
	Overlap of trajectory 1 with itself: 0.2575
	Overlap of trajectory 2 with itself: 0.2576
 Similary 0.944
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.1935
	Overlap of trajectory 1 with itself: 0.2575
	Overlap of trajectory 2 with itself: 0.2667
 Similary 0.9314
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.2123
	Overlap of trajectory 1 with itself: 0.2575
	Overlap of trajectory 2 with itself: 0.2584
 Similary 0.9543
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.2195
	Overlap of trajectory 1 with itself: 0.2411
	Overlap of trajectory 2 with itself: 0.2545
 Similary 0.9717
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.1934
	Overlap of trajectory 1 with itself: 0.2411
	Overlap of trajectory 2 with itself: 0.2576
 Similary 0.944
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.1972
	Overlap of trajectory 1 with itself: 0.2411
	Overlap of trajectory 2 with itself: 0.2667
 Similary 0.9433
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.205
	Overlap of trajectory 1 with itself: 0.2411
	Overlap of trajectory 2 with itself: 0.2584
 Similary 0.9553
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.2047
	Overlap of trajectory 1 with itself: 0.2545
	Overlap of trajectory 2 with itself: 0.2576
 Similary 0.9486
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.1986
	Overlap of trajectory 1 with itself: 0.2545
	Overlap of trajectory 2 with itself: 0.2667
 Similary 0.9379
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.2131
	Overlap of trajectory 1 with itself: 0.2545
	Overlap of trajectory 2 with itself: 0.2584
 Similary 0.9566
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.1956
	Overlap of trajectory 1 with itself: 0.2576
	Overlap of trajectory 2 with itself: 0.2667
 Similary 0.9335
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.2054
	Overlap of trajectory 1 with itself: 0.2576
	Overlap of trajectory 2 with itself: 0.2584
 Similary 0.9474
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.2093
	Overlap of trajectory 1 with itself: 0.2667
	Overlap of trajectory 2 with itself: 0.2584
 Similary 0.9467
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.2709
	Overlap of trajectory 1 with itself: 0.3793
	Overlap of trajectory 2 with itself: 0.3618
 Similary 0.9003
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.2967
	Overlap of trajectory 1 with itself: 0.3793
	Overlap of trajectory 2 with itself: 0.362
 Similary 0.9261
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.2572
	Overlap of trajectory 1 with itself: 0.3793
	Overlap of trajectory 2 with itself: 0.3568
 Similary 0.8892
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.2287
	Overlap of trajectory 1 with itself: 0.3793
	Overlap of trajectory 2 with itself: 0.3767
 Similary 0.8507
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.2411
	Overlap of trajectory 1 with itself: 0.3793
	Overlap of trajectory 2 with itself: 0.3382
 Similary 0.8824
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.2855
	Overlap of trajectory 1 with itself: 0.3618
	Overlap of trajectory 2 with itself: 0.362
 Similary 0.9236
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.2247
	Overlap of trajectory 1 with itself: 0.3618
	Overlap of trajectory 2 with itself: 0.3568
 Similary 0.8654
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.2393
	Overlap of trajectory 1 with itself: 0.3618
	Overlap of trajectory 2 with itself: 0.3767
 Similary 0.8701
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.2309
	Overlap of trajectory 1 with itself: 0.3618
	Overlap of trajectory 2 with itself: 0.3382
 Similary 0.8809
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.2481
	Overlap of trajectory 1 with itself: 0.362
	Overlap of trajectory 2 with itself: 0.3568
 Similary 0.8887
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.225
	Overlap of trajectory 1 with itself: 0.362
	Overlap of trajectory 2 with itself: 0.3767
 Similary 0.8557
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.2332
	Overlap of trajectory 1 with itself: 0.362
	Overlap of trajectory 2 with itself: 0.3382
 Similary 0.8831
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.2339
	Overlap of trajectory 1 with itself: 0.3568
	Overlap of trajectory 2 with itself: 0.3767
 Similary 0.8672
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.2303
	Overlap of trajectory 1 with itself: 0.3568
	Overlap of trajectory 2 with itself: 0.3382
 Similary 0.8828
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.2609
	Overlap of trajectory 1 with itself: 0.3767
	Overlap of trajectory 2 with itself: 0.3382
 Similary 0.9034
In [17]:
# Statistic significance of convergence for block 100
# The groups are simulation 0,1,2 inactive 3,4,5 active

# No randomization group
within_conditions = [similarity_matrix_100_norand[0][1], similarity_matrix_100_norand[0][2], similarity_matrix_100_norand[1][2], 
                     similarity_matrix_100_norand[3][4], similarity_matrix_100_norand[3][5], similarity_matrix_100_norand[4][5]]

between_conditions = [similarity_matrix_100_norand[0][3], similarity_matrix_100_norand[0][4], similarity_matrix_100_norand[0][5], 
                      similarity_matrix_100_norand[1][3], similarity_matrix_100_norand[1][4], similarity_matrix_100_norand[1][5],
                      similarity_matrix_100_norand[2][3], similarity_matrix_100_norand[2][4], similarity_matrix_100_norand[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 100 is {p_value}")

within_conditions = [similarity_matrix_50_norand[0][1], similarity_matrix_50_norand[0][2], similarity_matrix_50_norand[1][2], 
                     similarity_matrix_50_norand[3][4], similarity_matrix_50_norand[3][5], similarity_matrix_50_norand[4][5]]

between_conditions = [similarity_matrix_50_norand[0][3], similarity_matrix_50_norand[0][4], similarity_matrix_50_norand[0][5], 
                      similarity_matrix_50_norand[1][3], similarity_matrix_50_norand[1][4], similarity_matrix_50_norand[1][5],
                      similarity_matrix_50_norand[2][3], similarity_matrix_50_norand[2][4], similarity_matrix_50_norand[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 50 is {p_value}")

within_conditions = [similarity_matrix_10_norand[0][1], similarity_matrix_10_norand[0][2], similarity_matrix_10_norand[1][2], 
                     similarity_matrix_10_norand[3][4], similarity_matrix_10_norand[3][5], similarity_matrix_10_norand[4][5]]

between_conditions = [similarity_matrix_10_norand[0][3], similarity_matrix_10_norand[0][4], similarity_matrix_10_norand[0][5], 
                      similarity_matrix_10_norand[1][3], similarity_matrix_10_norand[1][4], similarity_matrix_10_norand[1][5],
                      similarity_matrix_10_norand[2][3], similarity_matrix_10_norand[2][4], similarity_matrix_10_norand[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 10 is {p_value}")


# Randomization group
within_conditions = [similarity_matrix_100[0][1], similarity_matrix_100[0][2], similarity_matrix_100[1][2], 
                     similarity_matrix_100[3][4], similarity_matrix_100[3][5], similarity_matrix_100[4][5]]

between_conditions = [similarity_matrix_100[0][3], similarity_matrix_100[0][4], similarity_matrix_100[0][5], 
                      similarity_matrix_100[1][3], similarity_matrix_100[1][4], similarity_matrix_100[1][5],
                      similarity_matrix_100[2][3], similarity_matrix_100[2][4], similarity_matrix_100[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for the randomized encoded trajectory with block 100 is {p_value}")

within_conditions = [similarity_matrix_50[0][1], similarity_matrix_50[0][2], similarity_matrix_50[1][2], 
                     similarity_matrix_50[3][4], similarity_matrix_50[3][5], similarity_matrix_50[4][5]]

between_conditions = [similarity_matrix_50[0][3], similarity_matrix_50[0][4], similarity_matrix_50[0][5], 
                      similarity_matrix_50[1][3], similarity_matrix_50[1][4], similarity_matrix_50[1][5],
                      similarity_matrix_50[2][3], similarity_matrix_50[2][4], similarity_matrix_50[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for the randomized encoded trajectory with block 50 is {p_value}")

within_conditions = [similarity_matrix_10[0][1], similarity_matrix_10[0][2], similarity_matrix_10[1][2], 
                     similarity_matrix_10[3][4], similarity_matrix_10[3][5], similarity_matrix_10[4][5]]

between_conditions = [similarity_matrix_10[0][3], similarity_matrix_10[0][4], similarity_matrix_10[0][5], 
                      similarity_matrix_10[1][3], similarity_matrix_10[1][4], similarity_matrix_10[1][5],
                      similarity_matrix_10[2][3], similarity_matrix_10[2][4], similarity_matrix_10[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for the randomized encoded trajectory with block 10 is {p_value}")
p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 100 is 0.05317949717130676
p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 50 is 0.09275176445640934
p-value of convergence within vs between conditions for the non randomization encoded trajectory with block 10 is 0.05303464738267857
p-value of convergence within vs between conditions for the randomized encoded trajectory with block 100 is 0.004525261989032756
p-value of convergence within vs between conditions for the randomized encoded trajectory with block 50 is 0.02122564681572018
p-value of convergence within vs between conditions for the randomized encoded trajectory with block 10 is 0.03040943603611007
In [18]:
# Create a box plot for the different block size
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# df to hold data
data = {
    "Block Size": [],
    "Group": [],
    "Similarity": []
}

# group that each index belongs to.
group = [0,0,0,1,1,1]

# Mapping of names to similarity matrices
sim_map = {"100": similarity_matrix_100_norand, "50": similarity_matrix_50_norand, "10": similarity_matrix_10_norand,
           "100 rand": similarity_matrix_100, "50 rand": similarity_matrix_50, "10 rand": similarity_matrix_10}

# Format the data into a dict for seaborn
for key in sim_map:
    matrix_to_use = sim_map[key]
    for i in range(6): # 6 simulations
        for j in range(i,6):
            g1 = group[i]
            g2 = group[j]
            if i == j:
                continue
            elif g1 == g2:
                data["Block Size"].append(key)
                data["Group"].append("Within conditions")
                data["Similarity"].append(matrix_to_use[i][j])
            else:
                data["Block Size"].append(key)
                data["Group"].append("Between conditions")
                data["Similarity"].append(matrix_to_use[i][j])

# Convert data to DataFrame
df = pd.DataFrame(data)

# Set Seaborn style
sns.set_theme(style="whitegrid", palette="crest")

# Create the boxplot
plt.figure(figsize=(10, 6))
sns.boxplot( data=df, x="Block Size", y="Similarity", hue="Group")

# Add labels
plt.xlabel("Block Size", fontsize=12)
plt.ylabel("Similarity", fontsize=12)
plt.legend(title="Group")

# Show the plot
plt.show()

The similarity (convergence) for the non randomized at block size 10 is barely significative p-value ~ 0.05 while the randomization of blocks greatly helped the convergence of the signal with p-values < 0.05 for all block sizes.

Up and down regulated fragments detection¶

Finally, we will compare the top hits for the different encoding sizes with randomization and without.

In [19]:
# Find upregulated and downregulated fragments
print("Find upregulated and downregulated fragments for randomized 100")

# 100 block random
updown_regulated_fragments = overlap_100.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
# The obtained dictionary has as keys the pairs of conditions. In this case (0,1).
# If more conditions were used one would have all the additional pairing (0,1), (0,2), (1,2) ....
t12_updown_100 = updown_regulated_fragments[(0,1)]

# 100 block no random
updown_regulated_fragments = overlap_100_norand.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_100_norand = updown_regulated_fragments[(0,1)]

# 50 block random
updown_regulated_fragments = overlap_50.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_50 = updown_regulated_fragments[(0,1)]

# 50 block no random
updown_regulated_fragments = overlap_50_norand.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_50_norand = updown_regulated_fragments[(0,1)]

# 10 block random
updown_regulated_fragments = overlap_10.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_10 = updown_regulated_fragments[(0,1)]

# 10 block no random
updown_regulated_fragments = overlap_10_norand.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
t12_updown_10_norand = updown_regulated_fragments[(0,1)]
Find upregulated and downregulated fragments for randomized 100
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
0           (0, 1)       -0.522549  1.339345e-02         0.034581
1           (0, 2)       -1.094437  9.572307e-07         0.000008
2           (0, 3)       -0.218183  2.442496e-01         0.361814
3           (0, 4)       -0.760571  5.631604e-03         0.016565
4           (0, 5)       -0.766811  1.257980e-03         0.004601
...            ...             ...           ...              ...
7255    (117, 119)        0.375417  3.133037e-01         0.434994
7256    (117, 120)        0.276478  3.662286e-01         0.487410
7257    (118, 119)        1.172490  2.578103e-02         0.059703
7258    (118, 120)        1.076641  2.037327e-02         0.049140
7259    (119, 120)       -0.398515  1.525957e-02         0.038520

[7260 rows x 4 columns]
     FragmentPairs  log2FoldChange   PValues  AdjustedPValues
0           (0, 1)        0.454542  0.198620         0.430700
1           (0, 2)        0.131890  0.747486         0.870648
2           (0, 3)        0.976979  0.002844         0.025323
3           (0, 4)        0.259348  0.405593         0.626112
4           (0, 5)       -0.106488  0.726527         0.859629
...            ...             ...       ...              ...
7255    (117, 119)        0.020499  0.965923         0.983121
7256    (117, 120)        0.047135  0.918079         0.961104
7257    (118, 119)       -0.334203  0.536086         0.729929
7258    (118, 120)       -0.144257  0.777131         0.886405
7259    (119, 120)       -0.389978  0.228897         0.467190

[7260 rows x 4 columns]
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
0           (0, 1)       -0.523155  3.096334e-04     9.639531e-04
1           (0, 2)       -0.948512  1.584162e-11     1.390691e-10
2           (0, 3)       -0.135474  3.171945e-01     4.139551e-01
3           (0, 4)       -0.725702  6.263658e-04     1.813887e-03
4           (0, 5)       -0.813870  3.087299e-05     1.154755e-04
...            ...             ...           ...              ...
7255    (117, 119)        0.358255  1.288376e-01     1.975419e-01
7256    (117, 120)        0.352574  9.379584e-02     1.513238e-01
7257    (118, 119)        0.929538  3.485537e-03     8.466041e-03
7258    (118, 120)        0.818631  4.451067e-03     1.055692e-02
7259    (119, 120)       -0.490005  5.990874e-05     2.133092e-04

[7260 rows x 4 columns]
     FragmentPairs  log2FoldChange   PValues  AdjustedPValues
0           (0, 1)        0.289152  0.331329         0.533833
1           (0, 2)        0.237128  0.360665         0.560899
2           (0, 3)        0.776993  0.000375         0.003544
3           (0, 4)        0.250453  0.346289         0.547368
4           (0, 5)       -0.121626  0.580416         0.748459
...            ...             ...       ...              ...
7255    (117, 119)       -0.109493  0.723493         0.846479
7256    (117, 120)        0.103839  0.695979         0.826568
7257    (118, 119)       -0.334476  0.332107         0.534492
7258    (118, 120)        0.072748  0.825344         0.909618
7259    (119, 120)       -0.246504  0.299234         0.502762

[7260 rows x 4 columns]
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
0           (0, 1)       -0.331704  1.953137e-06     4.495807e-06
1           (0, 2)       -0.508715  5.727635e-13     2.282252e-12
2           (0, 3)        0.044968  6.407765e-01     6.870532e-01
3           (0, 4)       -1.209684  3.350584e-13     1.356678e-12
4           (0, 5)       -1.226485  5.721196e-20     3.452692e-19
...            ...             ...           ...              ...
7255    (117, 119)        0.396949  8.089016e-03     1.255639e-02
7256    (117, 120)        0.187222  1.552596e-01     1.944427e-01
7257    (118, 119)        0.741376  6.284903e-08     1.679367e-07
7258    (118, 120)        0.440293  1.055203e-03     1.838438e-03
7259    (119, 120)       -0.404690  1.789773e-10     5.941359e-10

[7260 rows x 4 columns]
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
0           (0, 1)       -0.768354  2.569098e-07     1.779744e-06
1           (0, 2)        0.434137  1.023072e-04     4.304868e-04
2           (0, 3)        0.262759  8.779505e-02     1.589218e-01
3           (0, 4)       -1.023850  1.377697e-06     8.362941e-06
4           (0, 5)       -1.127947  2.672290e-08     2.248068e-07
...            ...             ...           ...              ...
7255    (117, 119)       -0.131397  5.982495e-01     7.053087e-01
7256    (117, 120)        0.213482  3.324949e-01     4.564889e-01
7257    (118, 119)       -0.271250  2.447417e-01     3.625433e-01
7258    (118, 120)        0.200357  4.100453e-01     5.348417e-01
7259    (119, 120)       -0.158504  1.459112e-01     2.419721e-01

[7260 rows x 4 columns]

Now we can filter the fragment pairs based on their p value and fold change

In [20]:
# Filtering parameters
pval_threshold = 0.01
fold_change_threshold = 2

# First extract significant fragments
significant_fragments_100 = t12_updown_100[t12_updown_100['AdjustedPValues'] < pval_threshold]
significant_fragments_100_norand = t12_updown_100_norand[t12_updown_100_norand['AdjustedPValues'] < pval_threshold]
significant_fragments_50 = t12_updown_50[t12_updown_50['AdjustedPValues'] < pval_threshold]
significant_fragments_50_norand = t12_updown_50_norand[t12_updown_50_norand['AdjustedPValues'] < pval_threshold]
significant_fragments_10 = t12_updown_10[t12_updown_10['AdjustedPValues'] < pval_threshold]
significant_fragments_10_norand = t12_updown_10_norand[t12_updown_10_norand['AdjustedPValues'] < pval_threshold]
In [21]:
# Second, filter by fold change and print top 5
# 100 random
up_reg_100 = significant_fragments_100[significant_fragments_100['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_100.head(5))

# 50 random
up_reg_50 = significant_fragments_50[significant_fragments_50['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_50.head(5))

# 10 random
up_reg_10 = significant_fragments_10[significant_fragments_10['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_10.head(5))
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
1776      (15, 97)        8.568563  1.143804e-12     3.533624e-11
2865      (26, 97)        5.910179  5.701885e-21     1.799812e-18
1131       (9, 97)        4.551334  1.376245e-14     6.404831e-13
6801      (90, 97)        4.501422  2.943828e-13     1.037485e-11
1020       (8, 97)        4.158793  3.712417e-10     6.224514e-09
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
2865      (26, 97)        6.894309  2.244594e-43     3.259151e-40
6692     (86, 114)        5.866300  5.037963e-05     1.819682e-04
1892     (16, 109)        5.579663  2.665850e-05     1.008025e-04
7187    (108, 114)        4.949725  3.119012e-03     7.655182e-03
1131       (9, 97)        4.948381  5.884336e-35     1.256479e-32
     FragmentPairs  log2FoldChange        PValues  AdjustedPValues
1131       (9, 97)        6.427045  2.114277e-161    2.558275e-158
2865      (26, 97)        6.059063  1.470354e-122    5.083224e-120
6587     (83, 114)        4.813141   8.800492e-28     7.782164e-27
1350      (11, 97)        4.611329  3.698651e-159    3.836029e-156
96         (0, 97)        4.547034  1.204831e-222    8.747074e-219

With the randomization approach the three block sizes achieved similar upregulated fragments

In [22]:
# Second, filter by fold change and print top 5
# 100 random
up_reg_100_norand = significant_fragments_100_norand[significant_fragments_100_norand['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_100.head(5))

# 50 random
up_reg_50_norand = significant_fragments_50_norand[significant_fragments_50_norand['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_50.head(5))

# 10 random
up_reg_10_norand = significant_fragments_10_norand[significant_fragments_10_norand['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
print(up_reg_10.head(5))
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
1776      (15, 97)        8.568563  1.143804e-12     3.533624e-11
2865      (26, 97)        5.910179  5.701885e-21     1.799812e-18
1131       (9, 97)        4.551334  1.376245e-14     6.404831e-13
6801      (90, 97)        4.501422  2.943828e-13     1.037485e-11
1020       (8, 97)        4.158793  3.712417e-10     6.224514e-09
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
2865      (26, 97)        6.894309  2.244594e-43     3.259151e-40
6692     (86, 114)        5.866300  5.037963e-05     1.819682e-04
1892     (16, 109)        5.579663  2.665850e-05     1.008025e-04
7187    (108, 114)        4.949725  3.119012e-03     7.655182e-03
1131       (9, 97)        4.948381  5.884336e-35     1.256479e-32
     FragmentPairs  log2FoldChange        PValues  AdjustedPValues
1131       (9, 97)        6.427045  2.114277e-161    2.558275e-158
2865      (26, 97)        6.059063  1.470354e-122    5.083224e-120
6587     (83, 114)        4.813141   8.800492e-28     7.782164e-27
1350      (11, 97)        4.611329  3.698651e-159    3.836029e-156
96         (0, 97)        4.547034  1.204831e-222    8.747074e-219
In [23]:
# Function to obtain most frequent fragments 
def get_top_fragments(df_reg):
    top_fragments_count = {}

    for fragment_pair in df_reg["FragmentPairs"]:
        top_fragments_count.setdefault(fragment_pair[0], 0) # record first fragment of the pair
        top_fragments_count.setdefault(fragment_pair[1], 0) # record second fragment of the pair
        top_fragments_count[fragment_pair[0]] += 1
        top_fragments_count[fragment_pair[1]] += 1

    # sort based on counts
    top_fragments_count = dict(sorted(top_fragments_count.items(), key=lambda item: item[1], reverse=True))

    return top_fragments_count

# Block 100 randomized

top_fragments_count = get_top_fragments(up_reg_100)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 100 block size randomized")
for i in range(5):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")

# Block 50 randomized

top_fragments_count = get_top_fragments(up_reg_50)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 50 block size randomized")
for i in range(5):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")

# Block 10 randomized

top_fragments_count = get_top_fragments(up_reg_10)
# Print top 5most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 10 block size randomized")
for i in range(5):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
NtrC 100 block size randomized
Fragment 90 appears 58 times.
Fragment 91 appears 57 times.
Fragment 89 appears 56 times.
Fragment 97 appears 44 times.
Fragment 92 appears 27 times.
NtrC 50 block size randomized
Fragment 91 appears 66 times.
Fragment 89 appears 54 times.
Fragment 90 appears 49 times.
Fragment 97 appears 33 times.
Fragment 92 appears 28 times.
NtrC 10 block size randomized
Fragment 91 appears 47 times.
Fragment 89 appears 32 times.
Fragment 92 appears 29 times.
Fragment 90 appears 27 times.
Fragment 97 appears 22 times.

All block sizes yielded a similar results with the same key fragments appearing on the top hits

In [24]:
# Block 100 non randomized

top_fragments_count = get_top_fragments(up_reg_100_norand)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 100 block size non randomized")
for i in range(5):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")

# Block 50 non randomized

top_fragments_count = get_top_fragments(up_reg_50_norand)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 50 block size non randomized")
for i in range(5):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")

# Block 10 non randomized

top_fragments_count = get_top_fragments(up_reg_10_norand)
# Print top 5 most appearing fragments
dict_keys = list(top_fragments_count.keys())
print("NtrC 10 block size non randomized")
for i in range(5):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
NtrC 100 block size non randomized
Fragment 89 appears 20 times.
Fragment 91 appears 20 times.
Fragment 97 appears 19 times.
Fragment 90 appears 17 times.
Fragment 88 appears 12 times.
NtrC 50 block size non randomized
Fragment 97 appears 15 times.
Fragment 91 appears 9 times.
Fragment 90 appears 7 times.
Fragment 89 appears 6 times.
Fragment 96 appears 6 times.
NtrC 10 block size non randomized
Fragment 83 appears 22 times.
Fragment 60 appears 19 times.
Fragment 84 appears 16 times.
Fragment 58 appears 9 times.
Fragment 62 appears 9 times.

Still a similar signal can be recovered for the top appearing fragments, given a long enough trajectory.

Conclusions¶

In this section, we have examined how to directly encode MD trajectories into structural alphabets and observed the improvements on consistency and reproducibility added by the addition of randomized encoding blocks.

Comparison of Alphabets¶

On this next section the structural alphabets of 3DI and M32k25 will be compared.

We will use a block size of 100 and randomized blocks for both alphabets. This section will recompute the whole process with the exception of the step to convert the MD into structural alphabets, but those files are already precomputed in the data folder. If the previous section has been already run, one can comment the lines for already precomputed objects.

In [25]:
# Initialize Structural Alphabet trajectory handler
print("Initialize Structural Alphabet trajectory handler")

# Set seeds for reproducibility
seed = 42  # Replace with any integer
import random
np.random.seed(seed)
random.seed(seed)

# With randomization
sa_ntrc_active_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_active_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_inactive_100_1 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_2 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
sa_ntrc_inactive_100_3 = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])

sa_ntrc_active_100_1_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_active_100_2_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_active_100_3_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])

sa_ntrc_inactive_100_1_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_inactive_100_2_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])
sa_ntrc_inactive_100_3_3di = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["3DI"])

# Load data
sa_ntrc_active_100_1.load_data("data_NtrC/encoded_NtrC_active_repl1_mk.sa")
sa_ntrc_active_100_2.load_data("data_NtrC/encoded_NtrC_active_repl2_mk.sa")
sa_ntrc_active_100_3.load_data("data_NtrC/encoded_NtrC_active_repl3_mk.sa")

sa_ntrc_inactive_100_1.load_data("data_NtrC/encoded_NtrC_inactive_repl1_mk.sa")
sa_ntrc_inactive_100_2.load_data("data_NtrC/encoded_NtrC_inactive_repl2_mk.sa")
sa_ntrc_inactive_100_3.load_data("data_NtrC/encoded_NtrC_inactive_repl3_mk.sa")

sa_ntrc_active_100_1_3di.load_data("data_NtrC/encoded_NtrC_active_repl1_3di.sa")
sa_ntrc_active_100_2_3di.load_data("data_NtrC/encoded_NtrC_active_repl2_3di.sa")
sa_ntrc_active_100_3_3di.load_data("data_NtrC/encoded_NtrC_active_repl3_3di.sa")

sa_ntrc_inactive_100_1_3di.load_data("data_NtrC/encoded_NtrC_inactive_repl1_3di.sa")
sa_ntrc_inactive_100_2_3di.load_data("data_NtrC/encoded_NtrC_inactive_repl2_3di.sa")
sa_ntrc_inactive_100_3_3di.load_data("data_NtrC/encoded_NtrC_inactive_repl3_3di.sa")
Initialize Structural Alphabet trajectory handler

Trajectory examination¶

One can examine the encoded structure string as well as all other analysis using the provided plotting functions.

Alternatively, one can addapt the provided plotting functions for other applications. They are all locate din the file Allohub_plots.py

To display the plots the argument action = "show" should be used while for saving to a file it should be action="save" If the save option is provided the name of the file can be specified with name="my_name.png". The format of the image will depend on the format specified in the file name.

In [26]:
# Plot the randomized trajectory of the Structural Alphabet M32K25 of NtrC active repl1
Allohub_plots.plot_SA_traj(sa_ntrc_active_100_1.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
In [27]:
# Plot the randomized trajectory of the Structural Alphabet M32K25 of NtrC inactive repl1
Allohub_plots.plot_SA_traj(sa_ntrc_inactive_100_1.get_int_traj(), SAtraj.ALPHABETS["M32K25"], action="show")
In [28]:
# Plot the randomized trajectory of the Structural Alphabet 3DI of NtrC active repl1
Allohub_plots.plot_SA_traj(sa_ntrc_active_100_1_3di.get_int_traj(), SAtraj.ALPHABETS["3DI"], action="show")
In [29]:
# Plot the randomized trajectory of the Structural Alphabet 3DI of NtrC inactive repl1
Allohub_plots.plot_SA_traj(sa_ntrc_inactive_100_1_3di.get_int_traj(), SAtraj.ALPHABETS["3DI"], action="show")

From the trajectory plots alone, one can already appreciate differences. M32K25 captures clear differences around the fragments at positions ~90 and shows small difference on stable regions while the 3DI alphabet is better are capturing small difference in all the protein but does not capture as well the conformation change at positions ~90

Mutual information analysis¶

Now we will proceed to compute the mutual information for each block. Each trajectory will produce 9 blocks of 100ns each

In [30]:
print("Calculate the MI information")
# One can specify the number of workers to parallelize the process. max_workers=None would use all available resources.

mi_ntrc_inactive_100_1 = sa_ntrc_inactive_100_1.compute_mis(max_workers=7)
print("NtrC inactive repl 1 finished")
mi_ntrc_inactive_100_2 = sa_ntrc_inactive_100_2.compute_mis(max_workers=7)
print("NtrC inactive repl 2 finished")
mi_ntrc_inactive_100_3 = sa_ntrc_inactive_100_3.compute_mis(max_workers=7)
print("NtrC inactive repl 3 finished")

mi_ntrc_active_100_1 = sa_ntrc_active_100_1.compute_mis(max_workers=7)
print("NtrC active repl 1 finished")
mi_ntrc_active_100_2 = sa_ntrc_active_100_2.compute_mis(max_workers=7)
print("NtrC active repl 2 finished")
mi_ntrc_active_100_3 = sa_ntrc_active_100_3.compute_mis(max_workers=7)
print("NtrC active repl 3 finished")

mi_ntrc_inactive_100_1_3di = sa_ntrc_inactive_100_1_3di.compute_mis(max_workers=7)
print("NtrC inactive, 3di alphabet, repl 1 finished")
mi_ntrc_inactive_100_2_3di = sa_ntrc_inactive_100_2_3di.compute_mis(max_workers=7)
print("NtrC inactive, 3di alphabet, repl 2 finished")
mi_ntrc_inactive_100_3_3di = sa_ntrc_inactive_100_3_3di.compute_mis(max_workers=7)
print("NtrC inactive, 3di alphabet, repl 3 finished")

mi_ntrc_active_100_1_3di = sa_ntrc_active_100_1_3di.compute_mis(max_workers=7)
print("NtrC active, 3di alphabet, repl 1 finished")
mi_ntrc_active_100_2_3di = sa_ntrc_active_100_2_3di.compute_mis(max_workers=7)
print("NtrC active, 3di alphabet, repl 2 finished")
mi_ntrc_active_100_3_3di = sa_ntrc_active_100_3_3di.compute_mis(max_workers=7)
print("NtrC active, 3di alphabet, repl 3 finished")
Calculate the MI information
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.69s/block]
TRAJECTORY COMPLETED
NtrC inactive repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00,  3.34s/block]
TRAJECTORY COMPLETED
NtrC inactive repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:31<00:00,  3.55s/block]
TRAJECTORY COMPLETED
NtrC inactive repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.76s/block]
TRAJECTORY COMPLETED
NtrC active repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:33<00:00,  3.72s/block]
TRAJECTORY COMPLETED
NtrC active repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:29<00:00,  3.31s/block]
TRAJECTORY COMPLETED
NtrC active repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:26<00:00,  2.93s/block]
TRAJECTORY COMPLETED
NtrC inactive, 3di alphabet, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:28<00:00,  3.12s/block]
TRAJECTORY COMPLETED
NtrC inactive, 3di alphabet, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00,  3.35s/block]
TRAJECTORY COMPLETED
NtrC inactive, 3di alphabet, repl 3 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00,  3.39s/block]
TRAJECTORY COMPLETED
NtrC active, 3di alphabet, repl 1 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:24<00:00,  2.72s/block]
TRAJECTORY COMPLETED
NtrC active, 3di alphabet, repl 2 finished
ENCODING TRAJECTORY
Computing MI: 100%|██████████| 9/9 [00:30<00:00,  3.35s/block]
TRAJECTORY COMPLETED
NtrC active, 3di alphabet, repl 3 finished

Shanon entropy analysis¶

Shanon entropy of the fragments gives an idea of sturctural flexibility. In the case of the M32k25 alphabet this analysis is complementary to cartesian analysis such as RMSF since fragment entropy captures local changes regardless of magnitude of difference thanks to the alphabets being based on internal coordinates.

In [31]:
# Compute the shanon entropy
print("Compute the shanon entropy")

entropy_ntrc_active_1 = sa_ntrc_active_100_1.compute_entropy()
entropy_ntrc_active_2 = sa_ntrc_active_100_2.compute_entropy()
entropy_ntrc_active_3 = sa_ntrc_active_100_3.compute_entropy()

entropy_ntrc_inactive_1 = sa_ntrc_inactive_100_1.compute_entropy()
entropy_ntrc_inactive_2 = sa_ntrc_inactive_100_2.compute_entropy()
entropy_ntrc_inactive_3 = sa_ntrc_inactive_100_3.compute_entropy()

entropy_ntrc_active_1_3di = sa_ntrc_active_100_1_3di.compute_entropy()
entropy_ntrc_active_2_3di = sa_ntrc_active_100_2_3di.compute_entropy()
entropy_ntrc_active_3_3di = sa_ntrc_active_100_3_3di.compute_entropy()

entropy_ntrc_inactive_1_3di = sa_ntrc_inactive_100_1_3di.compute_entropy()
entropy_ntrc_inactive_2_3di = sa_ntrc_inactive_100_2_3di.compute_entropy()
entropy_ntrc_inactive_3_3di = sa_ntrc_inactive_100_3_3di.compute_entropy()
Compute the shanon entropy
In [32]:
# Plot the entropies of active NtrC with their standard deviations for the trajectories encoded with M32k25 alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_active_1, entropy_ntrc_active_2, entropy_ntrc_active_3], action="show", ylim=(0,4))
In [33]:
# Plot the entropies of inactive NtrC with their standard deviations for the trajectories encoded with M32k25 alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_inactive_1, entropy_ntrc_inactive_2, entropy_ntrc_inactive_3], action="show", ylim=(0,4))
In [34]:
# Plot the entropies of active NtrC with their standard deviations for the trajectories encoded with 3di alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_active_1_3di, entropy_ntrc_active_2_3di, entropy_ntrc_active_3_3di], action="show", ylim=(0,4))
In [35]:
# Plot the entropies of inactive NtrC with their standard deviations for the trajectories encoded with 3di alphabet.
Allohub_plots.plot_shanon_entropy_sd([entropy_ntrc_inactive_1_3di, entropy_ntrc_inactive_2_3di, entropy_ntrc_inactive_3_3di], action="show", ylim=(0,4))

The differences can be better observed by substracting the mean entropy arrays.

In [36]:
# Compute the mean entropy per condition for the M32K25 alphabet
mean_act = np.mean([np.array(entropy_ntrc_active_1), np.array(entropy_ntrc_active_2), np.array(entropy_ntrc_active_3)], axis=0)

mean_inact = np.mean([np.array(entropy_ntrc_inactive_1), np.array(entropy_ntrc_inactive_2), np.array(entropy_ntrc_inactive_3)], axis=0)

diff = mean_act - mean_inact
# Plot the mean entropy
Allohub_plots.plot_shanon_entropy(diff, ylim=(-3,2.5), action="show", name="mk_entro.svg")
In [37]:
# Compute the mean entropy per condition for the 3di alphabet
mean_act = np.mean([np.array(entropy_ntrc_active_1_3di), np.array(entropy_ntrc_active_2_3di), np.array(entropy_ntrc_active_3_3di)], axis=0)

mean_inact = np.mean([np.array(entropy_ntrc_inactive_1_3di), np.array(entropy_ntrc_inactive_2_3di), np.array(entropy_ntrc_inactive_3_3di)], axis=0)

diff = mean_act - mean_inact
# Plot the mean entropy
Allohub_plots.plot_shanon_entropy(diff, ylim=(-3,2.5), action="show", name="entro_3di.svg")

Positions around fragment 60 and 90 are known positions that undergo conformational and flexibility changes, suggesting that the approach correctly described this effect.

Analysis of allosteric Signal¶

Following the previous analysis, we will now extract important fragments and possible signaling path for the NtrC. For that we will use the trajectories encoded with M32K25, which managed to capture the conformation space changes with more sensitivity.

Eigenvector decomposition¶

The eigenvector decomposition of the obtained MI matrices can be used to asses convergence. The main motions of well converged simulations should have relatively high eigenvector overlap (>0.3).

Overlap between replicates can be used as a way to asses relaiability of the results. (Higher convergences = Higher confidences).

In [38]:
# Do an eigenvector decomposition of the matrices
from tqdm import tqdm

print("Do an eigenvector decomposition of the matrices")

for mi_tr in tqdm(mi_ntrc_active_100_1, desc="Eigenvector decomposition for NtrC active 1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_2, desc="Eigenvector decomposition for NtrC active 2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_active_100_3, desc="Eigenvector decomposition for NtrC active 3", unit="matrix"):
    mi_tr.compute_eigensystem()

for mi_tr in tqdm(mi_ntrc_inactive_100_1, desc="Eigenvector decomposition for NtrC inactive 1", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_2, desc="Eigenvector decomposition for NtrC inactive 2", unit="matrix"):
    mi_tr.compute_eigensystem()
for mi_tr in tqdm(mi_ntrc_inactive_100_3, desc="Eigenvector decomposition for NtrC inactive 3", unit="matrix"):
    mi_tr.compute_eigensystem()
Do an eigenvector decomposition of the matrices
Eigenvector decomposition for NtrC active 1: 100%|██████████| 9/9 [00:00<00:00, 141.42matrix/s]
Eigenvector decomposition for NtrC active 2: 100%|██████████| 9/9 [00:00<00:00, 121.60matrix/s]
Eigenvector decomposition for NtrC active 3: 100%|██████████| 9/9 [00:00<00:00, 200.72matrix/s]
Eigenvector decomposition for NtrC inactive 1: 100%|██████████| 9/9 [00:00<00:00, 492.82matrix/s]
Eigenvector decomposition for NtrC inactive 2: 100%|██████████| 9/9 [00:00<00:00, 428.64matrix/s]
Eigenvector decomposition for NtrC inactive 3: 100%|██████████| 9/9 [00:00<00:00, 243.11matrix/s]

Convergence analysis¶

Overlap can be now computed using the Overlap object.

In this analysis we are using the top 3 eigenvectors which should explain most of the observed variability.

In [39]:
# Create the overlap handler to compute similarities between the trajectories

# M32K25
overlap_100 = Overlap.Overlap([mi_ntrc_inactive_100_1, mi_ntrc_inactive_100_2, mi_ntrc_inactive_100_3,
                               mi_ntrc_active_100_1,  mi_ntrc_active_100_2, mi_ntrc_active_100_3], ev_list=[0,1,2])
overlap_100.fill_overlap_matrix()
In [40]:
# Convergence with alphabet M32K25
Allohub_plots.plot_overlap(overlap_100.get_overlap_matrix(), vmax=0.4, action="show")
In [41]:
# Compute similarities between overlap matrices
similarity_matrix_100 = overlap_100.compute_similarities()
SIMILARITIES BETWEEN TRAJECTORY 0  and 1
	Overlap between trajectories: 0.2601
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5491
 Similary 0.7317
SIMILARITIES BETWEEN TRAJECTORY 0  and 2
	Overlap between trajectories: 0.2776
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.4596
 Similary 0.7939
SIMILARITIES BETWEEN TRAJECTORY 0  and 3
	Overlap between trajectories: 0.2513
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5566
 Similary 0.7191
SIMILARITIES BETWEEN TRAJECTORY 0  and 4
	Overlap between trajectories: 0.2028
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.6803
SIMILARITIES BETWEEN TRAJECTORY 0  and 5
	Overlap between trajectories: 0.2106
	Overlap of trajectory 1 with itself: 0.5077
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6988
SIMILARITIES BETWEEN TRAJECTORY 1  and 2
	Overlap between trajectories: 0.2559
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.4596
 Similary 0.7515
SIMILARITIES BETWEEN TRAJECTORY 1  and 3
	Overlap between trajectories: 0.1944
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.5566
 Similary 0.6416
SIMILARITIES BETWEEN TRAJECTORY 1  and 4
	Overlap between trajectories: 0.2403
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.6971
SIMILARITIES BETWEEN TRAJECTORY 1  and 5
	Overlap between trajectories: 0.2074
	Overlap of trajectory 1 with itself: 0.5491
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6749
SIMILARITIES BETWEEN TRAJECTORY 2  and 3
	Overlap between trajectories: 0.2186
	Overlap of trajectory 1 with itself: 0.4596
	Overlap of trajectory 2 with itself: 0.5566
 Similary 0.7105
SIMILARITIES BETWEEN TRAJECTORY 2  and 4
	Overlap between trajectories: 0.199
	Overlap of trajectory 1 with itself: 0.4596
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.7006
SIMILARITIES BETWEEN TRAJECTORY 2  and 5
	Overlap between trajectories: 0.1805
	Overlap of trajectory 1 with itself: 0.4596
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6928
SIMILARITIES BETWEEN TRAJECTORY 3  and 4
	Overlap between trajectories: 0.2327
	Overlap of trajectory 1 with itself: 0.5566
	Overlap of trajectory 2 with itself: 0.5372
 Similary 0.6858
SIMILARITIES BETWEEN TRAJECTORY 3  and 5
	Overlap between trajectories: 0.2145
	Overlap of trajectory 1 with itself: 0.5566
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.6784
SIMILARITIES BETWEEN TRAJECTORY 4  and 5
	Overlap between trajectories: 0.2812
	Overlap of trajectory 1 with itself: 0.5372
	Overlap of trajectory 2 with itself: 0.5158
 Similary 0.7547
In [42]:
# Statistic significance of convergence for M32K25
# The groups are simulation 0,1,2 inactive 3,4,5 active
# The similarity_matrix contains the overlap between the indexed trajectories.
within_conditions = [similarity_matrix_100[0][1], similarity_matrix_100[0][2], similarity_matrix_100[1][2], 
                     similarity_matrix_100[3][4], similarity_matrix_100[3][5], similarity_matrix_100[4][5]]

between_conditions = [similarity_matrix_100[0][3], similarity_matrix_100[0][4], similarity_matrix_100[0][5], 
                      similarity_matrix_100[1][3], similarity_matrix_100[1][4], similarity_matrix_100[1][5],
                      similarity_matrix_100[2][3], similarity_matrix_100[2][4], similarity_matrix_100[2][5]]

tat, p_value = ttest_ind(within_conditions, between_conditions, equal_var=False, alternative='greater')

print(f"p-value of convergence within vs between conditions for NtrC with alphabet M32K25 is {p_value}")
p-value of convergence within vs between conditions for NtrC with alphabet M32K25 is 0.004525261989032756

The obtained p-value indicates that the convergence within replicates is signicantly different than the convergence between conditions, meaning that the analysis captured unique signal for this system.

Up and down regulated fragments detection¶

The next step is to find up and down regulated fragments. For that, one needs the mapping of trajectories, that is to which condition each simulation belongs.

In this case we have two conditions, inactive (0) and active (1).

The argument splitting controls wether the statistics should be compute using the mean mi matrices per replicate (splitting = False) or using all the mi matrices (splitting = True).

Using all mi matrices will show more coupled fragments but will produce a higher degree of false positives.

For this analysis we will use all mi matrices.

In [43]:
# Find upregulated and downregulated fragments
print("Find upregulated and downregulated fragments")

# The fold change of non correlated points would produce a division by zero runtime warning this warnings can be silenced as following:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

updown_regulated_fragments = overlap_100.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)

# The obtained dictionary has as keys the pairs of conditions. In this case (0,1).
# If more conditions were used one would have all the additional pairing (0,1), (0,2), (1,2) ....
t12_updown = updown_regulated_fragments[(0,1)]
Find upregulated and downregulated fragments
     FragmentPairs  log2FoldChange       PValues  AdjustedPValues
0           (0, 1)       -0.522549  1.339345e-02         0.034581
1           (0, 2)       -1.094437  9.572307e-07         0.000008
2           (0, 3)       -0.218183  2.442496e-01         0.361814
3           (0, 4)       -0.760571  5.631604e-03         0.016565
4           (0, 5)       -0.766811  1.257980e-03         0.004601
...            ...             ...           ...              ...
7255    (117, 119)        0.375417  3.133037e-01         0.434994
7256    (117, 120)        0.276478  3.662286e-01         0.487410
7257    (118, 119)        1.172490  2.578103e-02         0.059703
7258    (118, 120)        1.076641  2.037327e-02         0.049140
7259    (119, 120)       -0.398515  1.525957e-02         0.038520

[7260 rows x 4 columns]

Now we can filter the up and down regulated fragments based on a p value and fold change

In [44]:
# Filtering parameters
pval_threshold = 0.01
fold_change_threshold = 2

# First extract individual fragments for ease of access
fragment_pairs = t12_updown["FragmentPairs"]
f1 = [x[0] for x in fragment_pairs]
f2 = [x[1] for x in fragment_pairs]
t12_updown["f1"] = f1
t12_updown["f2"] = f2

# Now extract significant fragments
significant_fragments = t12_updown[t12_updown['AdjustedPValues'] < pval_threshold]

# Second, filter by fold change and print top 10
up_reg = significant_fragments[significant_fragments['log2FoldChange'] > fold_change_threshold].sort_values('log2FoldChange', ascending=False)
up_reg.head(10)
Out[44]:
FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
1776 (15, 97) 8.568563 1.143804e-12 3.533624e-11 15 97
2865 (26, 97) 5.910179 5.701885e-21 1.799812e-18 26 97
1131 (9, 97) 4.551334 1.376245e-14 6.404831e-13 9 97
6801 (90, 97) 4.501422 2.943828e-13 1.037485e-11 90 97
1020 (8, 97) 4.158793 3.712417e-10 6.224514e-09 8 97
6290 (76, 97) 3.868711 2.552682e-21 9.266236e-19 76 97
6771 (89, 97) 3.846886 9.854762e-24 7.949508e-21 89 97
6702 (87, 91) 3.780066 5.791781e-11 1.161556e-09 87 91
6734 (88, 91) 3.757264 1.472253e-18 2.274161e-16 88 91
6858 (92, 97) 3.700540 1.155911e-22 5.994223e-20 92 97
In [45]:
# Show top 10 Down-regulated fragments
down_reg = significant_fragments[significant_fragments['log2FoldChange'] < -fold_change_threshold].sort_values('log2FoldChange')
down_reg.head(10)
Out[45]:
FragmentPairs log2FoldChange PValues AdjustedPValues f1 f2
7000 (97, 114) -7.220985 2.519139e-04 1.123400e-03 97 114
6998 (97, 112) -5.651724 1.822613e-03 6.364679e-03 97 112
1705 (15, 26) -4.384441 1.095541e-03 4.095587e-03 15 26
693 (5, 109) -4.013189 4.345503e-06 3.111277e-05 5 109
5608 (63, 65) -3.802584 9.350739e-08 9.558103e-07 63 65
7095 (102, 109) -3.725565 2.503383e-03 8.356120e-03 102 109
2592 (23, 109) -3.657752 3.001072e-04 1.307790e-03 23 109
299 (2, 63) -3.640988 8.256406e-17 7.309940e-15 2 63
5702 (64, 103) -3.471873 8.752932e-11 1.667881e-09 64 103
5699 (64, 100) -3.443561 8.360845e-08 8.659020e-07 64 100
In [46]:
# Most frequent fragments
top_fragments_count = {}

for fragment_pair in up_reg["FragmentPairs"]:
    top_fragments_count.setdefault(fragment_pair[0], 0) # record first fragment of the pair
    top_fragments_count.setdefault(fragment_pair[1], 0) # record second fragment of the pair
    top_fragments_count[fragment_pair[0]] += 1
    top_fragments_count[fragment_pair[1]] += 1

for fragment_pair in down_reg["FragmentPairs"]:
    top_fragments_count.setdefault(fragment_pair[0], 0) # record first fragment of the pair
    top_fragments_count.setdefault(fragment_pair[1], 0) # record second fragment of the pair
    top_fragments_count[fragment_pair[0]] += 1
    top_fragments_count[fragment_pair[1]] += 1

# sort based on counts
top_fragments_count = dict(sorted(top_fragments_count.items(), key=lambda item: item[1], reverse=True))

# Print top 10 most appearing fragments
dict_keys = list(top_fragments_count.keys())
for i in range(10):
    frag = dict_keys[i]
    print(f"Fragment {frag} appears {top_fragments_count[frag]} times.")
Fragment 64 appears 62 times.
Fragment 63 appears 60 times.
Fragment 90 appears 58 times.
Fragment 91 appears 57 times.
Fragment 89 appears 56 times.
Fragment 109 appears 49 times.
Fragment 97 appears 46 times.
Fragment 92 appears 27 times.
Fragment 88 appears 23 times.
Fragment 93 appears 14 times.

Interpretation

One can also create a volcano plot of the data to visualize the results

In [47]:
# Plot volcano plot of Up and Down regulated fragments
Allohub_plots.plot_updownregulation(t12_updown,  fold_threshold=fold_change_threshold, ylim=25, pvalue_threshold=pval_threshold, action="show")
In [64]:
# Check the pair 82 - 101
sorted_significant_fragments = significant_fragments.sort_values(by="log2FoldChange", key=abs, ascending=False).reset_index(drop=False)

print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (80,99)]) # Fragments that contain residue 82 would be 79-82
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (81,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (82,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (80,100)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (81,100)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (82,100)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (80,101)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (81,101)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (82,101)])
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
1900   6497      (81, 99)       -0.888099  0.000174         0.000818  81  99
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
2147   6535      (82, 99)       -0.650953  0.001518         0.005428  82  99
      index FragmentPairs  log2FoldChange  PValues  AdjustedPValues  f1   f2
2184   6459     (80, 100)       -0.564555  0.00265         0.008773  80  100
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1   f2
1748   6498     (81, 100)       -1.003776  0.000003         0.000024  81  100
      index FragmentPairs  log2FoldChange  PValues  AdjustedPValues  f1   f2
1624   6536     (82, 100)       -1.082251  0.00002         0.000124  82  100
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
In [65]:
# Check the pair 66 - 99
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (64,97)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,97)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,97)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (64,98)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,98)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,98)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (64,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,99)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,99)])

# check  67 - (95,96)
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,94)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,94)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (67,94)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,95)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,95)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (67,95)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (65,96)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (66,96)])
print(sorted_significant_fragments[sorted_significant_fragments["FragmentPairs"] == (67,96)])
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
     index FragmentPairs  log2FoldChange       PValues  AdjustedPValues  f1  \
484   5697      (64, 98)         -2.0426  7.758154e-12     1.955701e-10  64   

     f2  
484  98  
      index FragmentPairs  log2FoldChange  PValues  AdjustedPValues  f1  f2
1108   5752      (65, 98)         -1.4107  0.00002         0.000124  65  98
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
1087   5806      (66, 98)       -1.422074  0.000374         0.001587  66  98
     index FragmentPairs  log2FoldChange       PValues  AdjustedPValues  f1  \
307   5698      (64, 99)       -2.308837  2.270552e-10     3.981693e-09  64   

     f2  
307  99  
     index FragmentPairs  log2FoldChange       PValues  AdjustedPValues  f1  \
460   5753      (65, 99)       -2.076101  1.504028e-11     3.455458e-10  65   

     f2  
460  99  
     index FragmentPairs  log2FoldChange       PValues  AdjustedPValues  f1  \
953   5807      (66, 99)       -1.546874  1.110615e-09     1.704665e-08  66   

     f2  
953  99  
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
     index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
823   5802      (66, 94)         -1.6697  0.001409         0.005079  66  94
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
1578   5749      (65, 95)       -1.108277  0.000009         0.000062  65  95
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
1119   5803      (66, 95)        -1.40584  0.000331         0.001424  66  95
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []
      index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
1131   5750      (65, 96)       -1.395014  0.000071         0.000372  65  96
     index FragmentPairs  log2FoldChange   PValues  AdjustedPValues  f1  f2
596   5804      (66, 96)       -1.923141  0.000133         0.000646  66  96
Empty DataFrame
Columns: [index, FragmentPairs, log2FoldChange, PValues, AdjustedPValues, f1, f2]
Index: []

Due to the 4 aminoaciod size nautre of the fragments, important fragments have to be seen as hubs of key residues rather than plain positions. To try to Disambiguate the aminoa acids with higher weight in the top fragments we will use Protein Language Models in the next section.

From the top differentialy regulated fragments we find positions with key reported residues involved in the activation of the protein. The previously reported Y101 - T82 switch appears in the significant hits as well as other important reported structural differences such as position ~99 and ~94 with the key structural switches of residues F99 and Y94 respectively. Interestingly, F99 and Y94 appear more consistently highlighting their higher importance as a activation characteristic.

On the top fragment pairs hits we find previously reported relevant structural contacts, such as the K(67) - Q(96) - Q(95) (KQQ motif) and the F99-L66 pair. Other residues from the β3−α3 loop(residue indices 60−63) and from the N-terminal half of α4 (residues indices 85−90)also appear among the most important parts, matching with the NMR experiments performed for this system.

Disambiguation of Fragments¶

To find which might be the most important residues for a given fragment, one can use PLM such as ESM2-650M and use the predicted likelihoods as a guide. The next section will ilustrate how to do that. First the distances between c alphas need to be calculated, after that the important residues can be extracted with the SAPLM module.

In [102]:
# Compute distances between c alphas from a pdb
import mdtraj as md

# Load trajectory
traj = md.load("data_NtrC/NtrC_active_nowat.pdb")

# Select only the C-alpha atoms
ca_indices = traj.topology.select('name CA')


# Extract the coordinates of C-alpha atoms
ca_positions = traj.xyz[0][ca_indices]

# Number of points
n_points = ca_positions.shape[0]

# Initialize an empty distance matrix
distance_matrix = np.zeros((n_points, n_points))

# Compute the pairwise distances using loops
for i in range(n_points):
    for j in range(n_points):
        # Compute Euclidean distance between points i and j
        distance_matrix[i, j] = np.sqrt(np.sum((ca_positions[i] - ca_positions[j])**2))

# Convert to angstroms
distance_matrix *= 10
In [82]:
from Allohubpy import SAPLM
# The sequence should match the one used in the simulations
traj = md.load("data_NtrC/NtrC_active_nowat.pdb")

# Create a subset trajectory containing only the protein
protein_traj = traj.atom_slice(traj.topology.select("protein"))

ntrc_sequence = ''.join([str(residue.code) for residue in protein_traj.topology.residues])


# Create the PLM handler. The fragment size for alphabet M32k25 is 4
esm_handler = SAPLM.SAPLM(fragment_size = 4)
esm_handler.set_sequence(ntrc_sequence)

# Extract likelihoods for top most appearing fragments
likelihood_df = esm_handler.fragment_likelihoods(fragment=64, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=63, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=90, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=91, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=89, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=109, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=97, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=92, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=88, offset=0)
print(likelihood_df)

likelihood_df = esm_handler.fragment_likelihoods(fragment=93, offset=0)
print(likelihood_df)
   Amino acid position Amino acid  Likelihood
0                   65          L    0.997173
1                   66          L    0.994718
2                   67          K    0.739040
3                   68          Q    0.889384
   Amino acid position Amino acid  Likelihood
0                   64          A    0.887619
1                   65          L    0.997173
2                   66          L    0.994718
3                   67          K    0.739040
   Amino acid position Amino acid  Likelihood
0                   91          V    0.699971
1                   92          S    0.796207
2                   93          A    0.990187
3                   94          Y    0.023101
   Amino acid position Amino acid  Likelihood
0                   92          S    0.796207
1                   93          A    0.990187
2                   94          Y    0.023101
3                   95          Q    0.803867
   Amino acid position Amino acid  Likelihood
0                   90          A    0.992293
1                   91          V    0.699971
2                   92          S    0.796207
3                   93          A    0.990187
   Amino acid position Amino acid  Likelihood
0                  110          E    0.669687
1                  111          A    0.648538
2                  112          V    0.874420
3                  113          A    0.853307
   Amino acid position Amino acid  Likelihood
0                   98          A    0.997481
1                   99          F    0.970710
2                  100          D    0.952278
3                  101          Y    0.927816
   Amino acid position Amino acid  Likelihood
0                   93          A    0.990187
1                   94          Y    0.023101
2                   95          Q    0.803867
3                   96          Q    0.791222
   Amino acid position Amino acid  Likelihood
0                   89          A    0.742131
1                   90          A    0.992293
2                   91          V    0.699971
3                   92          S    0.796207
   Amino acid position Amino acid  Likelihood
0                   94          Y    0.023101
1                   95          Q    0.803867
2                   96          Q    0.791222
3                   97          G    0.996706

Graph Analysis¶

Finally, we can examine possible comuniaction path between the phosphorilation site D54 which induces the shift to the active conformation of NtrC to positions found in the signaling surface that were picked up as significant in the previous steps (88-100).

In [110]:
importlib.reload(SANetwork)
# We are interested in the signal transmition from D54
SAgraph = SANetwork.SANetWork(traj_list= mi_ntrc_active_100_1 + mi_ntrc_active_100_2 + mi_ntrc_active_100_3, distance_limit=7)

# Load the distances. The fragment size for M32k25 is 4.
SAgraph.set_distance_matrix(matrix=distance_matrix, fragment_size=4)

# The pval_threshold filters coupled pairs with low significance
SAgraph.create_graph(threshold=50)

# list of fragments of interest
start = [54,55,56]
end = [i for i in range(88,101)]

# Subgraph is a networkx object with the nodes and edges of the shortest paths connecting those residues
# Shortest_pathd ans shortest_distances are list of the shortest paths and their distances respectively.
# z_score provides an estimate of how statistically coupled the two sites are
subgraph, shortest_paths, shortest_distances, z_score = SAgraph.identify_preferential_connections(start_fragments=start, end_fragments=end)

# Create representation of the graph
Allohub_plots.plot_SA_graph(subgraph, start, end, action="show")